DA analysis with Milo
We test for differential abundance between healthy and cirrhotic livers. We start by defining neighbourhoods with refined sampling on the KNN graph. We inspect the size of neighbourhoods.
nhoodDistances(liver_milo)
named list()
Then we make a design matrix for the differential test, assigning samples to conditions.
liver_meta <- as.tibble(colData(liver_milo)[,c("dataset","condition")])
`as.tibble()` is deprecated as of tibble 2.0.0.
Please use `as_tibble()` instead.
The signature and semantics have changed, see `?as_tibble`.
[90mThis warning is displayed once every 8 hours.[39m
[90mCall `lifecycle::last_warnings()` to see where this warning was generated.[39m
liver_meta <- distinct(liver_meta) %>%
mutate(condition=factor(condition, levels=c("Uninjured", "Cirrhotic"))) %>%
column_to_rownames("dataset")
Now we can count cells in neighbourhoods and perform the DA test.
milo_res <- testNhoods(liver_milo, design = ~ condition, design.df = liver_meta, fdr.weighting = "k-distance")
Performing spatial FDR correction withk-distance weighting
write_csv(milo_res,"/nfs/team205/ed6/data/Ramachandran2019_liver/liver_results_20201008.csv")
cannot open file '/nfs/team205/ed6/data/Ramachandran2019_liver/liver_results_20201008.csv': Operation not permittedError in open.connection(path, "wb") : cannot open the connection
Parsed with column specification:
cols(
logFC = [32mcol_double()[39m,
logCPM = [32mcol_double()[39m,
F = [32mcol_double()[39m,
PValue = [32mcol_double()[39m,
FDR = [32mcol_double()[39m,
Nhood = [32mcol_double()[39m,
SpatialFDR = [32mcol_double()[39m,
annotation_indepth = [31mcol_character()[39m,
annotation_indepth_fraction = [32mcol_double()[39m
)
Exploration of DA results
We can start by looking at some basic stats

The distribution of P-values looks sensible and from the volcano plot we can see that we have identified some DA neighbourhoods at 10% FDR. We can visualize DA neighbourhoods building an abstracted graph
liver_milo <- buildNhoodGraph(liver_milo)
plotNhoodGraphDA(liver_milo, milo_res, alpha = 0.05)
Making figures for the manuscript
colourCount = length(unique(liver_milo$annotation_lineage))
getPalette = colorRampPalette(brewer.pal(9, "Spectral"))
umap_df <- data.frame(reducedDim(liver_milo, "UMAP"))
colnames(umap_df) <- c("UMAP_1", "UMAP_2")
umap1 <- cbind(umap_df, annotation_lineage=liver_milo$annotation_lineage) %>%
ggplot(aes(UMAP_1, UMAP_2, color=as.character(annotation_lineage))) +
geom_point(size=0.1, alpha=0.5) +
ggrepel::geom_text_repel(data = . %>%
group_by(annotation_lineage) %>%
summarise(UMAP_1=mean(UMAP_1), UMAP_2=mean(UMAP_2)),
aes(label=annotation_lineage), color="black", size=6
) +
scale_color_manual(values=getPalette(colourCount)) +
guides(color="none") +
xlab("UMAP1") + ylab("UMAP2") +
coord_fixed() +
theme_classic(base_size = 22) +
theme(axis.text = element_blank(), axis.ticks = element_blank(),
legend.text = element_text(size=18), legend.title = element_text(size=20))
umap2 <-
cbind(umap_df, condition=as.character(liver_milo$condition)) %>%
ggplot(aes(UMAP_1, UMAP_2, color=condition)) +
geom_point(size=0.1, alpha=0.5) +
scale_color_brewer(palette="Set1", name='') +
xlab("UMAP1") + ylab("UMAP2") +
coord_fixed() +
guides(color='none') +
facet_wrap(condition~., ncol=1) +
theme_nothing() +
theme(axis.text = element_blank(), axis.ticks = element_blank(), legend.position=c(0.9,0.9),
strip.background = element_rect(color=NA), strip.text = element_text(size=16))
theme(axis.text = element_blank(), axis.ticks = element_blank(), legend.position=c(0.9,0.9),
legend.text = element_text(size=18), legend.title = element_text(size=20))
List of 5
$ axis.text : list()
..- attr(*, "class")= chr [1:2] "element_blank" "element"
$ axis.ticks : list()
..- attr(*, "class")= chr [1:2] "element_blank" "element"
$ legend.text :List of 11
..$ family : NULL
..$ face : NULL
..$ colour : NULL
..$ size : num 18
..$ hjust : NULL
..$ vjust : NULL
..$ angle : NULL
..$ lineheight : NULL
..$ margin : NULL
..$ debug : NULL
..$ inherit.blank: logi FALSE
..- attr(*, "class")= chr [1:2] "element_text" "element"
$ legend.title :List of 11
..$ family : NULL
..$ face : NULL
..$ colour : NULL
..$ size : num 20
..$ hjust : NULL
..$ vjust : NULL
..$ angle : NULL
..$ lineheight : NULL
..$ margin : NULL
..$ debug : NULL
..$ inherit.blank: logi FALSE
..- attr(*, "class")= chr [1:2] "element_text" "element"
$ legend.position: num [1:2] 0.9 0.9
- attr(*, "class")= chr [1:2] "theme" "gg"
- attr(*, "complete")= logi FALSE
- attr(*, "validate")= logi TRUE
nh_graph_pl <- plotNhoodGraphDA(liver_milo, milo_res, alpha = 0.1) +
theme(legend.text = element_text(size=22), legend.title = element_text(size=24)) +
coord_fixed()
invalid factor level, NA generatedinvalid factor level, NA generatedinvalid factor level, NA generatedinvalid factor level, NA generatedinvalid factor level, NA generatedinvalid factor level, NA generatedinvalid factor level, NA generated
fig4_top <- (umap1 | umap2 | nh_graph_pl) +
plot_layout(widths = c(3,1,3))
fig4_top +
ggsave("~/milo_output/liver_embedding.pdf", width=15, height = 10)

Next, we can check the cell types where we observe most differences between healthy and cirrhotic cells, by taking the most frequent cell type in each neighbourhood.
#' Add annotation of most frequent cell type per nhood to milo results table
add_nhood_coldata_to_res <- function(milo, milo_res, coldata_col){
nhood_counts <- sapply(seq_along(nhoods(milo)), function(x) table(colData(milo)[as.vector(nhoods(milo)[[x]]), coldata_col]))
nhood_counts <- t(nhood_counts)
rownames(nhood_counts) <- seq_along(nhoods(milo))
max_val <- apply(nhood_counts, 1, function(x) colnames(nhood_counts)[which.max(x)])
max_frac <- apply(nhood_counts, 1, function(x) max(x)/sum(x))
milo_res[coldata_col] <- max_val
milo_res[paste0(coldata_col, "_fraction")] <- max_frac
return(milo_res)
}
milo_res <- add_nhood_coldata_to_res(liver_milo, milo_res, "annotation_indepth")
milo_res$annotation_lineage.x <- NULL
milo_res$annotation_lineage.y <- NULL
milo_res$annotation_lineage <- NULL
anno_df <- data.frame(liver_milo@colData) %>%
distinct(annotation_lineage, annotation_indepth)
milo_res <- left_join(milo_res, anno_df, by="annotation_indepth")
We first check that neighbourhoods are quite homogeneous

I can recover all the clusters where DA was detected in the original paper (see all the barplots for each lineage) and more! All in a single analysis, and without knowing where the subclusters are. Let’s bear in mind that positive logFC –> more cirrhotic, negative logFC —> more healthy
paper_DA <- list(cirrhotic=c("MPs (4)","MPs (5)",
"Endothelia (6)", "Endothelia (7)",
"Mes (3)",
"Tcells (2)",
"Myofibroblasts"
),
healthy=c("MPs (7)",
"Endothelia (1)",
"Tcells (1)", "Tcells (3)","Tcells (1)",
"ILCs (1)"
)
)
expDA_df <- bind_rows(
data.frame(annotation_indepth = paper_DA[["cirrhotic"]], pred_DA="cirrhotic"),
data.frame(annotation_indepth = paper_DA[["healthy"]], pred_DA="healthy")
)
pl1 <- milo_res %>%
left_join(expDA_df) %>%
mutate(is_signif = ifelse(SpatialFDR < 0.1, 1, 0)) %>%
mutate(logFC_color = ifelse(is_signif==1, logFC, NA)) %>%
arrange(annotation_lineage) %>%
mutate(Nhood=factor(Nhood, levels=unique(Nhood))) %>%
ggplot(aes(annotation_indepth, logFC, color=logFC_color)) +
scale_color_gradient2() +
guides(color="none") +
xlab("annotation") + ylab("Log Fold Change") +
ggbeeswarm::geom_quasirandom(alpha=1) +
coord_flip() +
facet_grid(annotation_lineage~., scales="free", space="free") +
theme_bw(base_size=22) +
theme(strip.text.y = element_text(angle=0),
axis.title.y = element_blank(), axis.text.y = element_blank(), axis.ticks.y = element_blank(),
)
Joining, by = "annotation_indepth"
pl2 <- milo_res %>%
left_join(expDA_df) %>%
# dplyr::filter(!is.na(pred_DA)) %>%
group_by(annotation_indepth) %>%
summarise(pred_DA=dplyr::first(pred_DA), annotation_lineage=dplyr::first(annotation_lineage)) %>%
mutate(end=ifelse(pred_DA=="healthy", 0, 1),
start=ifelse(pred_DA=="healthy", 1, 0)) %>%
ggplot(aes(annotation_indepth, start, xend = annotation_indepth, yend = end, color=pred_DA)) +
geom_segment(size=1,arrow=arrow(length = unit(0.1, "npc"), type="closed")) +
coord_flip() +
xlab("annotation") +
facet_grid(annotation_lineage~.,
# annotation_lineage~"Ramachandran et al.\nDA predictions",
scales="free", space="free") +
# guides(color="none") +
scale_color_brewer(palette="Set1", direction = -1,
labels=c("enriched in cirrhotic", "enriched in healthy"),
na.translate = F,
name="Ramachandran et al.\nDA predictions") +
guides(color=guide_legend(ncol = 1)) +
theme_bw(base_size=22) +
ylim(-0.1,1.1) +
theme(strip.text.y = element_blank(),strip.text.x = element_text(angle=90),
plot.margin = unit(c(0,0,0,0), "cm"), panel.grid = element_blank(),
axis.title.x = element_blank(), axis.text.x = element_blank(), axis.ticks.x = element_blank(),
legend.position = "bottom")
Joining, by = "annotation_indepth"
`summarise()` ungrouping output (override with `.groups` argument)
fig4_bleft <- (pl2 + pl1 +
plot_layout(widths=c(1,10), guides = "collect") & theme(legend.position = 'top', legend.justification = 0))
fig4_bleft +
ggsave("~/milo_output/liver_DAcomparison.pdf", width=8, height = 13)

Close-up on Endothelial lineage
# endo_milo <- scater::runPCA(endo_milo, subset_row=endo_hvgs, ncomponents=11)
endo_milo <- scater::runUMAP(liver_milo[,liver_milo$annotation_lineage=="Endothelia"], dimred='PCA')
umap_df <- data.frame(reducedDim(endo_milo, "UMAP"))
colnames(umap_df) <- c("UMAP_1", "UMAP_2")
endo_umap <- cbind(umap_df, condition=endo_milo$condition) %>%
ggplot(aes(UMAP_1, UMAP_2, color=condition)) +
geom_point(size=0.3, alpha=0.5) +
scale_color_brewer(palette="Set1", name='') +
xlab("UMAP1") + ylab("UMAP2") +
coord_fixed() +
guides(color="none") +
facet_wrap(condition~., ncol=1) +
theme_nothing() +
theme(axis.text = element_blank(), axis.ticks = element_blank(), legend.position=c(0.9,0.9),
strip.background = element_rect(color=NA), strip.text = element_text(size=16))
endo_umap

liver_milo2 <- liver_milo
subset.nhoods <- str_detect(milo_res$annotation_indepth, "Endo")
reducedDim(liver_milo2, "UMAP")[colnames(endo_milo),] <- reducedDim(endo_milo, "UMAP")
endo_gr <-
plotNhoodGraphDA(
liver_milo2, milo_res,
subset.nhoods = subset.nhoods,
# subset.nhoods =subset.nhoods[1:(length(subset.nhoods)-1)],
alpha = 0.1
) +
theme(legend.text = element_text(size=22), legend.title = element_text(size=24))
invalid factor level, NA generatedinvalid factor level, NA generatedinvalid factor level, NA generatedinvalid factor level, NA generatedinvalid factor level, NA generatedinvalid factor level, NA generatedinvalid factor level, NA generated
fig4_bright1 <- ((endo_umap + endo_gr ) +
plot_layout(widths = c(1,2),
guides = "collect"
))
fig4_bright1 +
ggsave("~/milo_output/liver_endoGraph.pdf", width=9, height = 5)

Differential expression between DA neighbourhoods
I want to merge overlapping nhoods with significant DA and the same direction of logFC, and then test for differential expression between cells in up and down nhoods (I guess you could also do up or down VS all the rest). This allows us to perform a comparison without further clustering.
endo_milo <- liver_milo[,which(liver_milo$annotation_lineage=="Endothelia")]
endo_res <- dplyr::filter(milo_res, annotation_lineage=="Endothelia")
rownames(endo_res) <- rownames(milo_res)[which(milo_res$annotation_lineage=="Endothelia")]
## keep HV genes expressed in at least 5% of endothelial cells
endo_hvg_cnts <- counts(liver_milo)[hvgs,liver_milo$annotation_lineage=="Endothelia"]
Error in counts(liver_milo)[hvgs, liver_milo$annotation_lineage == "Endothelia"] :
object 'hvgs' not found
# rownames(milo_res) <- names(nhoods(liver_milo)) ## If I don't set the nhood index as name the grouping doesn't
nhood_markers <- findNhoodMarkers(liver_milo, milo_res, overlap=1, assay="logcounts", return.groups = TRUE,
subset.row = endo_hvgs,
subset.nhoods = str_detect(milo_res$annotation_indepth, "Endo"))
Found 1335 DA neighbourhoods at FDR 10%
Nhoods aggregated into 2 groups
sum(nhood_markers$dge$adj.P.Val_1 < 0.01)
[1] 572
marker_genes <- nhood_markers$dge %>%
dplyr::filter(adj.P.Val_1 < 0.01) %>%
pull(GeneID)
liver_milo <- calcNhoodExpression(liver_milo, subset.row = marker_genes)
highlight_genes <- c("PLVAP", "SOX14", "VWA1", "ACKR1",
"CLEC4G", "CLEC4M", "STAB2", "MRC1",
"CD14", "CCL21", "SOX17", "WNT2", "RSPO3", "AIF1L",
"PROX1", "PDPN","CPE", "CD320")
fig4_bbright <- plotNhoodExpressionDA(liver_milo, milo_res, marker_genes, cluster_features = TRUE,
alpha = 0.1,
scale_to_1 = TRUE,
subset.nhoods = milo_res$annotation_lineage=="Endothelia",
highlight_features = highlight_genes, show_rownames = FALSE
) +
ylab("DE genes") +
theme(legend.text = element_text(size=22), legend.title = element_text(size=24))
Some elements of highlight_features are not in features and will not be highlighted.
Missing features: PLVAP, SOX14, CD14, CCL21, SOX17, WNT2, RSPO3, AIF1L, PDPN, CPE
Customize plotting for paper figure
x <- liver_milo
da.res <- milo_res
cluster_features = TRUE
alpha = 0.1
scale_to_1 = TRUE
subset.nhoods = milo_res$annotation_lineage=="Endothelia"
features <- marker_genes
expr_mat <- nhoodExpression(x)[features,]
colnames(expr_mat) <- 1:length(nhoods(x))
## Get nhood expression matrix
if (!is.null(subset.nhoods)) {
expr_mat <- expr_mat[,subset.nhoods, drop=FALSE]
}
if (!isFALSE(scale_to_1)) {
expr_mat <- t(apply(expr_mat, 1, function(x) (x - min(x))/(max(x)- min(x))))
}
rownames(expr_mat) <- sub(pattern = "-", replacement = ".", rownames(expr_mat)) ## To avoid problems when converting to data.frame
pl_df <- data.frame(t(expr_mat)) %>%
rownames_to_column("Nhood") %>%
mutate(Nhood=as.double(Nhood)) %>%
left_join(da.res, by="Nhood") %>%
mutate(logFC_rank=percent_rank(logFC))
## Top plot: nhoods ranked by DA log FC
pl_top <- pl_df %>%
mutate(is_signif = ifelse(SpatialFDR < alpha, paste0("SpatialFDR < ", alpha), NA)) %>%
ggplot(aes(logFC_rank, logFC)) +
geom_hline(yintercept = 0, linetype=2) +
geom_point(size=0.2, color="grey") +
geom_point(data=. %>% dplyr::filter(!is.na(is_signif)), aes(color=is_signif), size=1) +
theme_bw(base_size=22) +
ylab("DA logFC") +
scale_color_manual(values="red", name="") +
guides(color=guide_legend(override.aes = list(size=2))) +
scale_x_continuous(expand = c(0.01, 0)) +
theme(axis.text.x = element_blank(), axis.ticks.x = element_blank(), axis.title.x = element_blank())
## Bottom plot: gene expression heatmap
if (isTRUE(cluster_features)) {
row.order <- hclust(dist(expr_mat))$order # clustering
ordered_features <- rownames(expr_mat)[row.order]
} else {
ordered_features <- rownames(expr_mat)
}
# detach('package:EnsDb.Hsapiens.v86')
# detach('package:ensembldb')
highlight_genes <- c("PLVAP", "SOX14", "VWA1", "ACKR1",
"CLEC4G", "CLEC4M", "STAB2", "MRC1",
"CD14", "CCL21", "SOX17", "WNT2", "RSPO3", "AIF1L",
"PROX1", "PDPN","CPE", "CD320")
pl_bottom <- pl_df %>%
pivot_longer(cols=rownames(expr_mat), names_to='feature', values_to="avg_expr") %>%
mutate(feature=factor(feature, levels=ordered_features)) %>%
mutate(label=ifelse(feature %in% highlight_genes, as.character(feature), NA)) %>%
ggplot(aes(logFC_rank, feature, fill=avg_expr)) +
geom_tile() +
scale_fill_viridis_c(option="magma", name="Scaled \nexpression") +
xlab("Endothelial Neighbourhoods") + ylab("DE genes") +
scale_x_continuous(expand = c(0.01, 0),
# limits = c(0, max(pl_df$logFC_rank) + 0.2)
) +
coord_cartesian(clip="off") +
ggrepel::geom_text_repel(data=. %>% dplyr::filter(!is.na(label)) %>%
group_by(label) %>%
summarise(logFC_rank=max(logFC_rank), avg_expr=mean(avg_expr), feature=dplyr::first(feature)),
aes(label=label, x=logFC_rank), size=7, xlim = c(max(pl_df$logFC_rank) + 0.01, max(pl_df$logFC_rank) + 0.02),
min.segment.length = 0, seed=42
) +
theme_classic(base_size = 22) +
theme(axis.text.x = element_blank(), axis.line.x = element_blank(), axis.ticks.x = element_blank(),
axis.text.y = element_blank(), axis.line.y = element_blank(), axis.ticks.y = element_blank(),
panel.spacing = margin(2, 2, 2, 2, "cm"),
legend.margin=margin(0,0,0,0),
legend.box.margin=margin(10,10,10,10)
)
## Assemble plot
fig4_bbright <- (pl_top / pl_bottom) + plot_layout(heights = c(1,4), guides = "collect") &
theme(legend.justification=c(0, 1))
fig4_bbright + ggsave("~/milo_output/liver_DEG_heatmap.pdf", width=9, height = 9)

Quick GO term analysis
# install.packages("enrichR")
library(enrichR)
dbs <- listEnrichrDbs()
websiteLive <- TRUE
if (is.null(dbs)) websiteLive <- FALSE
if (websiteLive) head(dbs)
dbs <- c("GO_Molecular_Function_2015", "GO_Cellular_Component_2015", "GO_Biological_Process_2015")
go_marker_genes <- enrichr(marker_genes, dbs)
go_all_genes <- enrichr(endo_hvgs, dbs)
ref_terms <- go_all_genes$GO_Biological_Process_2015$Term[go_all_genes$GO_Biological_Process_2015$Adjusted.P.value < 0.01]
go_plot <- go_marker_genes$GO_Biological_Process_2015[go_marker_genes$GO_Biological_Process_2015$Adjusted.P.value < 0.01,] %>%
dplyr::filter(!Term %in% ref_terms) %>%
top_n(30, -log10(Adjusted.P.value)) %>%
mutate(Term=factor(Term, levels=rev(unique(Term)))) %>%
ggplot(aes(Term, -log10(Adjusted.P.value))) +
geom_point() +
coord_flip() +
xlab("GO Biological Function") + ylab("-log10(Adj. p-value)") +
theme_bw(base_size=14)
go_plot +
ggsave("~/milo_output/liver_endoDEG_GOenrich.pdf", width=9, height = 5)
Assemble figure
fig4_top /
((fig4_bleft | (fig4_bright1 / fig4_bbright) + plot_layout(heights = c(1,1.5))) +
plot_layout(widths=c(1,1.2))) +
plot_layout(heights=c(1,1.5)) +
ggsave("~/milo_output/fig4.pdf", height = 26, width = 22, useDingbats=FALSE)

NA
Assemble supplementary figure


---
title: "Milo: liver cirrhosis analysis"
output: html_notebook
---

## Introduction

In this notebook we demonstrate how to use Milo to detect abherrant cell states in diseased tissues, using a dataset of hepatic non-parenchymal cells isolated from 5 healthy and 5 cirrhotic human livers. [Ramachandran et al. 2019](https://www.nature.com/articles/s41586-019-1631-3#Sec1) (GEO accessiion: GSE136103).

```{r}
suppressPackageStartupMessages({
  library(tidyverse)
  library(irlba)
  library(DropletUtils)
  library(scater)
  library(scran)
  library(Seurat) ## just 4 loading the object
  library(miloR)
  library(SingleCellExperiment)
  library(patchwork)
  library(igraph)
  })
```

## Load data

We downloaded the dataset and annotations stored in Seurat object from [here](https://datashare.is.ed.ac.uk/handle/10283/3433), as indicated by the authors.

```{r}
load("/nfs/team205/ed6/data/Ramachandran2019_liver/tissue.rdata")

## Convert to SingleCellExperiment
liver_sce <- SingleCellExperiment(assay = list(counts=tissue@raw.data, logcounts=tissue@data), 
                                  colData = tissue@meta.data)

liver_sce
```

## Preprocessing 

### Feature selection

```{r}
dec_liver <- modelGeneVar(liver_sce)

fit_liver <- metadata(dec_liver)
plot(fit_liver$mean, fit_liver$var, xlab="Mean of log-expression",
    ylab="Variance of log-expression")

hvgs <- getTopHVGs(dec_liver, n=3000)
```

### Dimensionality reduction with PCA

```{r, fig.height=14, fig.width=14}
liver_sce <- runPCA(liver_sce, subset_row=hvgs, ncomponents=11)

plotPCA(liver_sce, colour_by="condition", ncomponents=3)
```

```{r, fig.height=8, fig.width=8}
liver_sce <- runUMAP(liver_sce, dimred="PCA", ncomponents=2)

scater::plotUMAP(liver_sce, colour_by="condition", point_alpha=1,  point_size=0.5) 
scater::plotUMAP(liver_sce, colour_by="dataset", point_alpha=0.3,  point_size=0.5)
scater::plotUMAP(liver_sce, colour_by="annotation_lineage", point_alpha=0.3,  point_size=0.5, text_by='annotation_lineage')
# scater::plotUMAP(liver_sce, colour_by='annotation_indepth', point_alpha=0.3,  point_size=0.5, text_by='annotation_indepth')
```

Notably, this dataset doesn't appear to display a batch effect

## DA analysis with Milo

We test for differential abundance between healthy and cirrhotic livers. We start by defining neighbourhoods with refined sampling on the KNN graph. We inspect the size of neighbourhoods.

```{r}
liver_milo <- Milo(liver_sce)

## Build KNN graph
liver_milo <- buildGraph(liver_milo, d = 11, k=30)

## Compute neighbourhoods with refined sampling
liver_milo <- makeNhoods(liver_milo, k=30, d=11, prop = 0.05, refined=TRUE)
plotNhoodSizeHist(liver_milo, bins=150)
```

Then we make a design matrix for the differential test, assigning samples to conditions.

```{r}
liver_meta <- as.tibble(colData(liver_milo)[,c("dataset","condition")]) 
liver_meta <- distinct(liver_meta) %>%
  mutate(condition=factor(condition, levels=c("Uninjured", "Cirrhotic"))) %>%
  column_to_rownames("dataset")
```

Now we can count cells in neighbourhoods and perform the DA test.

```{r}
liver_milo <- countCells(liver_milo, samples = "dataset", meta.data = data.frame(colData(liver_milo)[,c("dataset","condition")]) )

liver_milo <- calcNhoodDistance(liver_milo, d=11)
milo_res <- testNhoods(liver_milo, design = ~ condition, design.df = liver_meta, fdr.weighting = "k-distance")
```

```{r}
## Save milo object and results
saveRDS(liver_milo,"/nfs/team205/ed6/data/Ramachandran2019_liver/tissue_milo.RDS")
saveRDS(liver_milo,"~/liver_milo_20201008.RDS")
write_csv(milo_res, "/nfs/team205/ed6/data/Ramachandran2019_liver/milo_results.csv")
write_csv(milo_res,"/nfs/team205/ed6/data/Ramachandran2019_liver/liver_results_20201008.csv")
```
```{r, echo=FALSE}
liver_milo <- readRDS("~/liver_milo_20201008.RDS")
milo_res <- read_csv("~/liver_results_20201008.csv")
```

## Exploration of DA results

We can start by looking at some basic stats

```{r}
pval_hist <- milo_res %>%
  ggplot(aes(PValue)) + 
  geom_histogram(bins=50) +
  theme_bw(base_size=14)

volcano <- 
  milo_res %>%
  ggplot(aes(logFC, -log10(SpatialFDR))) + 
  geom_point(size=0.4, alpha=0.2) +
  geom_hline(yintercept = -log10(0.1)) +
  xlab("log-Fold Change") +
  theme_bw(base_size=14)

pval_hist + volcano
```

The distribution of P-values looks sensible and from the volcano plot we can see that we have identified some DA neighbourhoods at 10% FDR.
We can visualize DA neighbourhoods building an abstracted graph

```{r, fig.width=14, fig.height=10}
liver_milo <- buildNhoodGraph(liver_milo)
plotNhoodGraphDA(liver_milo, milo_res, alpha = 0.05)
```

Making figures for the manuscript

```{r, fig.width=15, fig.height=10}
colourCount = length(unique(liver_milo$annotation_lineage))
getPalette = colorRampPalette(brewer.pal(9, "Spectral"))

umap_df <- data.frame(reducedDim(liver_milo, "UMAP"))
colnames(umap_df) <- c("UMAP_1", "UMAP_2")

umap1 <- cbind(umap_df, annotation_lineage=liver_milo$annotation_lineage) %>%
  ggplot(aes(UMAP_1, UMAP_2, color=as.character(annotation_lineage))) +
  geom_point(size=0.1, alpha=0.5) +
  ggrepel::geom_text_repel(data = . %>% 
              group_by(annotation_lineage) %>% 
              summarise(UMAP_1=mean(UMAP_1), UMAP_2=mean(UMAP_2)),
            aes(label=annotation_lineage), color="black", size=6
            ) +
  scale_color_manual(values=getPalette(colourCount)) +
  guides(color="none") +
  xlab("UMAP1") + ylab("UMAP2") +
  coord_fixed() +
  theme_classic(base_size = 22) +
  theme(axis.text = element_blank(), axis.ticks = element_blank(),
        legend.text = element_text(size=18), legend.title = element_text(size=20))

umap2 <-
  cbind(umap_df, condition=as.character(liver_milo$condition)) %>%
  ggplot(aes(UMAP_1, UMAP_2, color=condition)) +
  geom_point(size=0.1, alpha=0.5) +
  scale_color_brewer(palette="Set1", name='') +
  xlab("UMAP1") + ylab("UMAP2") +
  coord_fixed() +
  guides(color='none') +
  facet_wrap(condition~., ncol=1) +
  theme_nothing() +
  theme(axis.text = element_blank(), axis.ticks = element_blank(), legend.position=c(0.9,0.9),
        strip.background = element_rect(color=NA), strip.text = element_text(size=16))

nh_graph_pl <- plotNhoodGraphDA(liver_milo, milo_res, alpha = 0.1) +
  theme(legend.text = element_text(size=22), legend.title = element_text(size=24)) +
  coord_fixed()

fig4_top <- (umap1 | umap2 | nh_graph_pl) + 
  plot_layout(widths = c(3,1,3)) 

fig4_top +
  ggsave("~/milo_output/liver_embedding.pdf", width=15, height = 10)
```

Next, we can check the cell types where we observe most differences between healthy and cirrhotic cells, by taking the most frequent cell type in each neighbourhood.

```{r, fig.width=9, fig.height=10}
#' Add annotation of most frequent cell type per nhood to milo results table
add_nhood_coldata_to_res <- function(milo, milo_res, coldata_col){
  nhood_counts <- sapply(seq_along(nhoods(milo)), function(x) table(colData(milo)[as.vector(nhoods(milo)[[x]]), coldata_col]))
  nhood_counts <- t(nhood_counts)
  rownames(nhood_counts) <- seq_along(nhoods(milo))
  max_val <- apply(nhood_counts, 1, function(x) colnames(nhood_counts)[which.max(x)])
  max_frac <- apply(nhood_counts, 1, function(x) max(x)/sum(x))
  milo_res[coldata_col] <- max_val
  milo_res[paste0(coldata_col, "_fraction")] <- max_frac
  return(milo_res)
}

milo_res <- add_nhood_coldata_to_res(liver_milo, milo_res, "annotation_indepth")
milo_res$annotation_lineage.x <- NULL
milo_res$annotation_lineage.y <- NULL
milo_res$annotation_lineage <- NULL
anno_df <- data.frame(liver_milo@colData) %>%
  distinct(annotation_lineage, annotation_indepth)
milo_res <- left_join(milo_res, anno_df, by="annotation_indepth")
```

We first check that neighbourhoods are quite homogeneous

```{r}
frac_hist <- ggplot(milo_res, aes(annotation_indepth_fraction)) +
  geom_histogram(bins=30) +
  xlab("Fraction of cells in \nmost abundant cluster") +
  ylab("# neighbourhoods") +
  theme_bw(base_size=14)

frac_hist
```

I can recover all the clusters where DA was detected in the original paper (see all the barplots for each lineage) and more! All in a single analysis, and without knowing where the subclusters are. Let's bear in mind that positive logFC --> more cirrhotic, negative logFC ---> more healthy

```{r, fig.width=10, fig.height=10}
paper_DA <- list(cirrhotic=c("MPs (4)","MPs (5)",
                             "Endothelia (6)", "Endothelia (7)",
                             "Mes (3)",
                             "Tcells (2)",
                             "Myofibroblasts"
                             ),
                 healthy=c("MPs (7)",
                           "Endothelia (1)",
                           "Tcells (1)", "Tcells (3)","Tcells (1)",
                           "ILCs (1)"
                           )
                 )

expDA_df <- bind_rows(
  data.frame(annotation_indepth = paper_DA[["cirrhotic"]], pred_DA="cirrhotic"),
  data.frame(annotation_indepth = paper_DA[["healthy"]], pred_DA="healthy")
  )

pl1 <- milo_res %>%
  left_join(expDA_df) %>%
  mutate(is_signif = ifelse(SpatialFDR < 0.1, 1, 0)) %>%
  mutate(logFC_color = ifelse(is_signif==1, logFC, NA)) %>%
  arrange(annotation_lineage) %>%
  mutate(Nhood=factor(Nhood, levels=unique(Nhood))) %>%
  ggplot(aes(annotation_indepth, logFC, color=logFC_color)) +
  scale_color_gradient2() +
  guides(color="none") +
  xlab("annotation") + ylab("Log Fold Change") +
  ggbeeswarm::geom_quasirandom(alpha=1) +
  coord_flip() +
  facet_grid(annotation_lineage~., scales="free", space="free") +
  theme_bw(base_size=22) +
  theme(strip.text.y =  element_text(angle=0),
        axis.title.y = element_blank(), axis.text.y = element_blank(), axis.ticks.y = element_blank(),
        )

pl2 <- milo_res %>%
  left_join(expDA_df) %>%
  # dplyr::filter(!is.na(pred_DA)) %>%
  group_by(annotation_indepth) %>%
  summarise(pred_DA=dplyr::first(pred_DA), annotation_lineage=dplyr::first(annotation_lineage)) %>%
  mutate(end=ifelse(pred_DA=="healthy", 0, 1),
         start=ifelse(pred_DA=="healthy", 1, 0)) %>%
  ggplot(aes(annotation_indepth, start, xend = annotation_indepth, yend = end, color=pred_DA)) +
  geom_segment(size=1,arrow=arrow(length = unit(0.1, "npc"), type="closed")) +
  coord_flip() +
  xlab("annotation") +
  facet_grid(annotation_lineage~.,
    # annotation_lineage~"Ramachandran et al.\nDA predictions", 
             scales="free", space="free") +
  # guides(color="none") +
  scale_color_brewer(palette="Set1", direction = -1, 
                     labels=c("enriched in cirrhotic", "enriched in healthy"),
                     na.translate = F,
                     name="Ramachandran et al.\nDA predictions") +
  guides(color=guide_legend(ncol = 1)) +
  theme_bw(base_size=22) +
  ylim(-0.1,1.1) +
  theme(strip.text.y = element_blank(),strip.text.x = element_text(angle=90),
        plot.margin = unit(c(0,0,0,0), "cm"), panel.grid = element_blank(),
        axis.title.x = element_blank(), axis.text.x = element_blank(), axis.ticks.x = element_blank(),
        legend.position = "bottom")

fig4_bleft <- (pl2 + pl1 + 
  plot_layout(widths=c(1,10), guides = "collect") & theme(legend.position = 'top', legend.justification = 0)) 

fig4_bleft +
  ggsave("~/milo_output/liver_DAcomparison.pdf", width=8, height = 13)
```

## Close-up on Endothelial lineage

```{r, fig.width=10, fig.height=8}
# endo_milo <- scater::runPCA(endo_milo, subset_row=endo_hvgs, ncomponents=11)
endo_milo <- scater::runUMAP(liver_milo[,liver_milo$annotation_lineage=="Endothelia"],  dimred='PCA')

umap_df <- data.frame(reducedDim(endo_milo, "UMAP"))
colnames(umap_df) <- c("UMAP_1", "UMAP_2")

endo_umap <- cbind(umap_df, condition=endo_milo$condition) %>%
   ggplot(aes(UMAP_1, UMAP_2, color=condition)) +
  geom_point(size=0.3, alpha=0.5) +
  scale_color_brewer(palette="Set1", name='') +
  xlab("UMAP1") + ylab("UMAP2") +
  coord_fixed() +
  guides(color="none") +
  facet_wrap(condition~., ncol=1) +
  theme_nothing() +
  theme(axis.text = element_blank(), axis.ticks = element_blank(), legend.position=c(0.9,0.9),
        strip.background = element_rect(color=NA), strip.text = element_text(size=16))

endo_umap
```

```{r, fig.width=8, fig.height=4}
liver_milo2 <- liver_milo
subset.nhoods <- str_detect(milo_res$annotation_indepth, "Endo")
reducedDim(liver_milo2, "UMAP")[colnames(endo_milo),] <- reducedDim(endo_milo, "UMAP") 


endo_gr <-
  plotNhoodGraphDA(
  liver_milo2, milo_res,
  subset.nhoods = subset.nhoods, 
  # subset.nhoods =subset.nhoods[1:(length(subset.nhoods)-1)], 
  alpha = 0.1
  )  +
   theme(legend.text = element_text(size=22), legend.title = element_text(size=24))
  
fig4_bright1 <- ((endo_umap + endo_gr ) + 
  plot_layout(widths = c(1,2), 
                guides = "collect"
                )) 

fig4_bright1 +
  ggsave("~/milo_output/liver_endoGraph.pdf", width=9, height = 5)  
```



### Differential expression between DA neighbourhoods

I want to merge overlapping nhoods with significant DA and the same direction of logFC, and then test for differential expression between cells in up and down nhoods (I guess you could also do up or down VS all the rest). This allows us to perform a comparison without further clustering.

```{r}
endo_milo <- liver_milo[,which(liver_milo$annotation_lineage=="Endothelia")]
endo_res <- dplyr::filter(milo_res, annotation_lineage=="Endothelia")

rownames(endo_res) <- rownames(milo_res)[which(milo_res$annotation_lineage=="Endothelia")]
```


```{r}
dec_liver <- modelGeneVar(liver_milo)
fit_liver <- metadata(dec_liver)
hvgs <- getTopHVGs(dec_liver, n=3000)

## keep HV genes expressed in at least 5% of endothelial cells
endo_hvg_cnts <- counts(liver_milo)[hvgs,liver_milo$annotation_lineage=="Endothelia"]
endo_hvgs <- hvgs[(rowSums(endo_hvg_cnts!=0) > (ncol(endo_hvg_cnts)/100)*5) & (rowSums(endo_hvg_cnts!=0) < (ncol(endo_hvg_cnts)/100)*80)] 
```



```{r}
# rownames(milo_res) <- names(nhoods(liver_milo)) ## If I don't set the nhood index as name the grouping doesn't
nhood_markers <- findNhoodMarkers(liver_milo, milo_res, overlap=1, assay="logcounts", return.groups = TRUE,
                                  subset.row = endo_hvgs,
                                  subset.nhoods = str_detect(milo_res$annotation_indepth, "Endo"))


sum(nhood_markers$dge$adj.P.Val_1 < 0.01)
```

```{r}
marker_genes <- nhood_markers$dge %>%
  dplyr::filter(adj.P.Val_1 < 0.01) %>%
  pull(GeneID)
```

```{r, fig.height=18, fig.width=12}
liver_milo <- calcNhoodExpression(liver_milo, subset.row = marker_genes)

highlight_genes <- c("PLVAP", "SOX14", "VWA1", "ACKR1",
                     "CLEC4G", "CLEC4M", "STAB2", "MRC1",
                     "CD14", "CCL21", "SOX17", "WNT2", "RSPO3", "AIF1L",
                     "PROX1", "PDPN","CPE", "CD320")

fig4_bbright <- plotNhoodExpressionDA(liver_milo, milo_res, marker_genes, cluster_features = TRUE, 
                      alpha = 0.1,
                      scale_to_1 = TRUE,
                      subset.nhoods = milo_res$annotation_lineage=="Endothelia",
                      highlight_features = highlight_genes, show_rownames = FALSE
                      ) +
  ylab("DE genes") +
   theme(legend.text = element_text(size=22), legend.title = element_text(size=24))
```


Customize plotting for paper figure

```{r}
x <- liver_milo
da.res <- milo_res

cluster_features = TRUE
alpha = 0.1
scale_to_1 = TRUE
subset.nhoods = milo_res$annotation_lineage=="Endothelia"
features <- marker_genes

expr_mat <- nhoodExpression(x)[features,]
colnames(expr_mat) <- 1:length(nhoods(x))

## Get nhood expression matrix
if (!is.null(subset.nhoods)) {
  expr_mat <- expr_mat[,subset.nhoods, drop=FALSE]
}

if (!isFALSE(scale_to_1)) {
  expr_mat <- t(apply(expr_mat, 1, function(x) (x - min(x))/(max(x)- min(x))))
}

rownames(expr_mat) <- sub(pattern = "-", replacement = ".", rownames(expr_mat)) ## To avoid problems when converting to data.frame

pl_df <- data.frame(t(expr_mat)) %>%
  rownames_to_column("Nhood") %>%
  mutate(Nhood=as.double(Nhood)) %>%
  left_join(da.res, by="Nhood") %>%
  mutate(logFC_rank=percent_rank(logFC))

## Top plot: nhoods ranked by DA log FC
pl_top <- pl_df %>%
  mutate(is_signif = ifelse(SpatialFDR < alpha, paste0("SpatialFDR < ", alpha), NA)) %>%
  ggplot(aes(logFC_rank, logFC)) +
  geom_hline(yintercept = 0, linetype=2) +
  geom_point(size=0.2, color="grey") +
  geom_point(data=. %>% dplyr::filter(!is.na(is_signif)), aes(color=is_signif), size=1) +
  theme_bw(base_size=22) +
  ylab("DA logFC") +
  scale_color_manual(values="red", name="") +
  guides(color=guide_legend(override.aes = list(size=2))) +
  scale_x_continuous(expand = c(0.01, 0)) +
  theme(axis.text.x = element_blank(), axis.ticks.x = element_blank(), axis.title.x = element_blank())

## Bottom plot: gene expression heatmap
if (isTRUE(cluster_features)) {
  row.order <- hclust(dist(expr_mat))$order # clustering
  ordered_features <- rownames(expr_mat)[row.order]
} else {
  ordered_features <- rownames(expr_mat)
}


```


```{r, fig.height=12, fig.width=10}
# detach('package:EnsDb.Hsapiens.v86')
# detach('package:ensembldb')

highlight_genes <- c("PLVAP", "SOX14", "VWA1", "ACKR1",
                     "CLEC4G", "CLEC4M", "STAB2", "MRC1",
                     "CD14", "CCL21", "SOX17", "WNT2", "RSPO3", "AIF1L",
                     "PROX1", "PDPN","CPE", "CD320")

pl_bottom <- pl_df %>%
  pivot_longer(cols=rownames(expr_mat), names_to='feature', values_to="avg_expr") %>%
  mutate(feature=factor(feature, levels=ordered_features)) %>%
  mutate(label=ifelse(feature %in% highlight_genes, as.character(feature), NA)) %>%
  ggplot(aes(logFC_rank, feature, fill=avg_expr)) +
  geom_tile() +
  scale_fill_viridis_c(option="magma", name="Scaled \nexpression") +
  xlab("Endothelial Neighbourhoods") + ylab("DE genes") +
  scale_x_continuous(expand = c(0.01, 0),
                     # limits = c(0, max(pl_df$logFC_rank) + 0.2)
                     ) +
  coord_cartesian(clip="off") +
  ggrepel::geom_text_repel(data=. %>% dplyr::filter(!is.na(label)) %>%
              group_by(label) %>%
              summarise(logFC_rank=max(logFC_rank), avg_expr=mean(avg_expr), feature=dplyr::first(feature)),
            aes(label=label, x=logFC_rank), size=7, xlim = c(max(pl_df$logFC_rank) + 0.01, max(pl_df$logFC_rank) + 0.02),
            min.segment.length = 0, seed=42
            ) +
  theme_classic(base_size = 22) +
  theme(axis.text.x = element_blank(), axis.line.x = element_blank(), axis.ticks.x = element_blank(),
        axis.text.y = element_blank(), axis.line.y = element_blank(), axis.ticks.y = element_blank(),
        panel.spacing = margin(2, 2, 2, 2, "cm"),
        legend.margin=margin(0,0,0,0),
        legend.box.margin=margin(10,10,10,10)
        )

## Assemble plot
fig4_bbright <-  (pl_top / pl_bottom) + plot_layout(heights = c(1,4), guides = "collect") & 
    theme(legend.justification=c(0, 1))
fig4_bbright +  ggsave("~/milo_output/liver_DEG_heatmap.pdf", width=9, height = 9)

```

#### Quick GO term analysis

```{r}
# install.packages("enrichR")
library(enrichR)

dbs <- listEnrichrDbs()
websiteLive <- TRUE
if (is.null(dbs)) websiteLive <- FALSE
if (websiteLive) head(dbs)

dbs <- c("GO_Molecular_Function_2015", "GO_Cellular_Component_2015", "GO_Biological_Process_2015")
go_marker_genes <- enrichr(marker_genes, dbs)
go_all_genes <- enrichr(endo_hvgs, dbs)

ref_terms <- go_all_genes$GO_Biological_Process_2015$Term[go_all_genes$GO_Biological_Process_2015$Adjusted.P.value < 0.01]

go_plot <- go_marker_genes$GO_Biological_Process_2015[go_marker_genes$GO_Biological_Process_2015$Adjusted.P.value < 0.01,] %>%
  dplyr::filter(!Term %in% ref_terms) %>%
  top_n(30, -log10(Adjusted.P.value)) %>%
  mutate(Term=factor(Term, levels=rev(unique(Term)))) %>%
  ggplot(aes(Term, -log10(Adjusted.P.value))) +
  geom_point() +
  coord_flip() +
  xlab("GO Biological Function") + ylab("-log10(Adj. p-value)") +
  theme_bw(base_size=14) 

go_plot +
  ggsave("~/milo_output/liver_endoDEG_GOenrich.pdf", width=9, height = 5)
```

Assemble figure
```{r, fig.height=25, fig.width=19}
fig4_top /
  ((fig4_bleft | (fig4_bright1 / fig4_bbright) + plot_layout(heights = c(1,1.5))) +
  plot_layout(widths=c(1,1.2))) +
  plot_layout(heights=c(1,1.5)) +
  ggsave("~/milo_output/fig4.pdf", height = 26, width = 22, useDingbats=FALSE)
  
```



Assemble supplementary figure

```{r, fig.width=9, fig.height=7}
p1 <- plot_grid(pval_hist, 
                volcano + ylab("- log10(Spatial FDR)"), 
                frac_hist, nrow=1,
                labels = c("A", "B", "C")) 
cowplot::plot_grid(p1, go_plot, rel_heights = c(1,1.5), ncol=1, labels = c("", "D")) +
  ggsave("~/milo_output/liver_supplementary.png", height = 7, width=8)
  
```

<!-- Let's check the genes identified as markers for the disease subtypes. Are they significantly differentially expressed between DA neighbourhoods? Yes they are! -->

<!-- ```{r} -->
<!-- disease_endo_markers <- c("ACKR1", 'CD34',"VWA1") -->

<!-- data.frame(markers$negLogFC_2) %>% -->
<!--   filter(FDR < 0.05) %>% -->
<!--   .[disease_endo_markers,] -->

<!-- data.frame(markers$negLogFC_1) %>% -->
<!--   filter(FDR < 0.05) %>% -->
<!--   .[disease_endo_markers,] -->
<!-- ``` -->

<!-- Visualize some of the markers that differentiate DA neighbourhoods, by plotting the percent of cells expressing each gene in a nhood. -->

<!-- ```{r} -->
<!-- ## Define plotting functions -->
<!-- .calculate_nhood_perc_expression <- function(milo, nhoods, gene){ -->
<!--   gene_cnts <- counts(milo)[gene,] -->
<!--   perc_expr <- sapply(nhoods(milo)[nhoods], function(x) sum(gene_cnts[x]>0)/length(x)) -->
<!--   perc_expr <- setNames(perc_expr, nhoods) -->
<!--   return(perc_expr) -->
<!--   } -->

<!-- .plot_nhood_expression <- function(milo, nhoods, features){ -->
<!--   perc_expr_mat <- sapply(features,  -->
<!--                           function(x) .calculate_nhood_perc_expression(milo, nhoods, x)) -->

<!--   pl_df <- data.frame(perc_expr_mat) %>% -->
<!--     rownames_to_column("Nhood") %>% -->
<!--     mutate(Nhood=as.double(Nhood)) %>% -->
<!--     left_join(milo_res) %>% -->
<!--     mutate(logFC_rank=rank(logFC))  -->

<!--   pl_top <- pl_df %>% -->
<!--       mutate(is_signif = ifelse(SpatialFDR < 0.1, "SpatialFDR < 0.1", NA)) %>% -->
<!--       ggplot(aes(logFC_rank, logFC)) + -->
<!--       geom_hline(yintercept = 0, linetype=2) + -->
<!--       geom_point(size=0.2) + -->
<!--       geom_point(data=.%>% filter(!is.na(is_signif)), aes(color=is_signif), size=0.5) + -->
<!--       theme_bw() + -->
<!--       scale_color_manual(values="red", name="") + -->
<!--       theme(axis.text.x = element_blank(), axis.ticks.x = element_blank(), axis.title.x = element_blank()) -->

<!--   pl_bottom <- pl_df %>% -->
<!--     pivot_longer(cols=features, names_to='feature', values_to="perc_expressed") %>% -->
<!--     mutate(feature=factor(feature, levels=features)) %>% -->
<!--     ggplot(aes(logFC_rank, feature, fill=perc_expressed)) +  -->
<!--     geom_tile() + -->
<!--     scale_fill_viridis_c(option="magma") + -->
<!--     ggbio::theme_clear() -->

<!--   (pl_top / pl_bottom) + plot_layout(heights = c(1,2)) -->
<!-- } -->
<!-- ``` -->


<!-- ```{r, fig.width=10, fig.height=12} -->
<!-- endo_nhoods <- endo_res %>%  pull(Nhood) -->

<!-- ## Select genes and sort by AUC -->
<!-- feats_neg2 <- -->
<!--   data.frame(markers$negLogFC_2) %>%  -->
<!--   top_n(50, - log10(FDR)) %>% -->
<!--   arrange(AUC.posLogFC_1) %>% -->
<!--   rownames() -->

<!-- .plot_nhood_expression(liver_milo, endo_nhoods, features=feats) -->
<!-- ``` -->

<!-- As described in the paper, we have that genes associated with extracellular matrix organization (e.g. VIM, ) are over expressed  -->


<!-- ```{r, fig.width=10, fig.height=7} -->
<!-- ## Select genes and sort by AUC -->
<!-- feats_neg1 <- -->
<!--   data.frame(markers$negLogFC_1) %>%  -->
<!--   top_n(50, - log10(FDR)) %>% -->
<!--   arrange(AUC.posLogFC_1) %>% -->
<!--   rownames() -->

<!-- .plot_nhood_expression(liver_milo, endo_nhoods, features=feats) -->

<!-- ``` -->

<!-- ```{r, fig.width=10, fig.height=7} -->
<!-- feats_neg1vsNeg2 <- -->
<!--   data.frame(markers$negLogFC_1) %>%  -->
<!--   top_n(50, - log10(FDR)) %>% -->
<!--   arrange(AUC.negLogFC_2) %>% -->
<!--   rownames() -->

<!-- .plot_nhood_expression(liver_milo, endo_nhoods, features=feats_neg1vsNeg2) -->
<!-- ``` -->


<!-- Look just at Endothelia (5) where you have both positive and negative fold-changes -->

<!-- ```{r} -->
<!-- endo5_nhoods <- milo_res %>%  -->
<!--   filter(annotation_indepth=="Endothelia (5)" & annotation_indepth_fraction > 0.7) %>% -->
<!--   pull(Nhood) -->

<!-- perc_expr_mat <- sapply(features,  -->
<!--                         function(x) .calculate_nhood_perc_expression(liver_milo, endo5_nhoods, x)) -->

<!-- pl_df <- data.frame(perc_expr_mat) %>% -->
<!--   rownames_to_column("Nhood") %>% -->
<!--   mutate(Nhood=as.double(Nhood)) %>% -->
<!--   left_join(milo_res) %>% -->
<!--   mutate(logFC_rank=rank(logFC))  -->

<!-- pl_top <- pl_df %>% -->
<!--     mutate(is_signif = ifelse(SpatialFDR < 0.1, "SpatialFDR < 0.1", NA)) %>% -->
<!--     ggplot(aes(logFC_rank, logFC)) + -->
<!--     geom_hline(yintercept = 0, linetype=2) + -->
<!--     geom_point(size=0.2) + -->
<!--     geom_point(data=.%>% filter(!is.na(is_signif)), aes(color=is_signif), size=0.5) + -->
<!--     theme_bw() + -->
<!--     scale_color_manual(values="red", name="") + -->
<!--     theme(axis.text.x = element_blank(), axis.ticks.x = element_blank(), axis.title.x = element_blank()) -->

<!-- pl_bottom <- pl_df %>% -->
<!--   pivot_longer(cols=features, names_to='feature', values_to="perc_expressed") %>% -->
<!--   ggplot(aes(logFC_rank, feature, fill=perc_expressed)) +  -->
<!--   geom_tile() + -->
<!--   scale_fill_viridis_c(option="magma") + -->
<!--   theme_clear() -->

<!-- (pl_top / pl_bottom) + plot_layout(heights = c(1,2)) -->
<!-- ``` -->

<!-- How to find association de novo in Endo 5? -->

<!-- ```{r, fig.height=8, fig.width=8} -->
<!-- ## Find most highly variable genes in this cluster -->
<!-- endo5_milo <- liver_milo[,which(colData(liver_milo)[["annotation_indepth"]]=="Endothelia (5)")] -->
<!-- dec_endo5 <- modelGeneVar(endo5_milo) -->
<!-- endo5_hvgs <- getTopHVGs(dec_endo5, n=1000) -->

<!-- perc_expr_mat <- sapply(endo5_hvgs, function(x) .calculate_nhood_perc_expression(liver_milo, endo5_nhoods, x)) -->

<!-- milo_res_endo5 <- milo_res[which(milo_res$Nhood %in% endo5_nhoods),] -->

<!-- fc_cor <- apply(perc_expr_mat, 2, function(x) Hmisc::rcorr(x, milo_res_endo5$logFC)$r[1,2]) -->
<!-- fc_cor_pval <- apply(perc_expr_mat, 2, function(x) Hmisc::rcorr(x, milo_res_endo5$logFC)$P[1,2]) -->

<!-- cor_feats <- names(which(abs(fc_cor) > 0.6 & abs(fc_cor_pval) < 0.05)) -->
<!-- cor_feats_ordered <- cor_feats[order(fc_cor[cor_feats])] -->

<!-- pl_df <- data.frame(perc_expr_mat[,cor_feats]) %>% -->
<!--   rownames_to_column("Nhood") %>% -->
<!--   mutate(Nhood=as.double(Nhood)) %>% -->
<!--   left_join(milo_res) %>% -->
<!--   mutate(logFC_rank=rank(logFC))  -->

<!-- pl_top <- pl_df %>% -->
<!--     mutate(is_signif = ifelse(SpatialFDR < 0.1, "SpatialFDR < 0.1", NA)) %>% -->
<!--     ggplot(aes(logFC_rank, logFC)) + -->
<!--     geom_hline(yintercept = 0, linetype=2) + -->
<!--     geom_point(size=0.2) + -->
<!--     geom_point(data=.%>% filter(!is.na(is_signif)), aes(color=is_signif), size=0.5) + -->
<!--     theme_bw() + -->
<!--     scale_color_manual(values="red", name="") + -->
<!--     theme(axis.text.x = element_blank(), axis.ticks.x = element_blank(), axis.title.x = element_blank()) -->

<!-- pl_bottom <- pl_df %>% -->
<!--   pivot_longer(cols=str_replace(cor_feats_ordered, "-", "."), names_to='feature', values_to="perc_expressed") %>% -->
<!--   mutate(feature=factor(feature, levels=str_replace(cor_feats_ordered, "-", "."))) %>% -->
<!--   ggplot(aes(logFC_rank, feature, fill=perc_expressed)) +  -->
<!--   geom_tile() + -->
<!--   scale_fill_viridis_c(option="magma") + -->
<!--   theme_clear() -->

<!-- (pl_top / pl_bottom) + plot_layout(heights = c(1,2)) -->
<!-- ``` -->



<!-- ```{r} -->
<!-- ## Run common PCA -->
<!-- merged_cnts <- cbind(nhoodExpression(endo_milo), logcounts(endo_milo)[hvgs,]) -->
<!-- merged_cnts_scaled <- t(scale(t(merged_cnts))) -->
<!-- merged_pca <- BiocSingular::runPCA(t(merged_cnts_scaled), rank=30, center=FALSE) -->
<!-- pca_mat <- rbind(merged_pca$x[(ncol(endo_milo)+1):(ncol(endo_milo)+(length(nhoods(endo_milo)))),], merged_pca$x[colnames(endo_milo),]) -->
<!-- ## Add to slot nhoodsReducedDim -->
<!-- nhoodReducedDim(endo_milo, "PCA") <- pca_mat -->

<!-- ## Run UMAP on joint PCA -->
<!-- umap_out <- uwot::umap(nhoodReducedDim(endo_milo, "PCA"), n_neighbors = 20, n_components = 2, scale=FALSE) -->
<!-- colnames(umap_out) <- c("UMAP_1", "UMAP_2") -->
<!-- nhoodReducedDim(endo_milo, "UMAP") <- umap_out -->

<!-- ``` -->

<!-- ```{r} -->
<!-- split_by=NULL -->
<!-- ## Join test results and dimensionality reductions -->
<!-- rdim_df <- data.frame(nhoodReducedDim(endo_milo, "UMAP")[,c(1,2)]) -->
<!-- colnames(rdim_df) <- c('X','Y') -->

<!-- n_nhoods <- length(nhoods(endo_milo)) -->
<!-- rdim_df[,"Nhood"] <- ifelse(1:nrow(rdim_df) %in% c(1:n_nhoods), c(1:n_nhoods), NA) -->
<!-- viz_df  <- left_join(rdim_df, milo_res[which(milo_res$annotation_lineage=="Endothelia"),], by="Nhood") -->
<!-- viz_df[["nhIndex"]] <- unlist(ifelse(!is.na(viz_df$Nhood), nhoodIndex(endo_milo)[viz_df$Nhood],NA)) -->
<!-- viz_df[is.na(viz_df["nhIndex"]),'nhIndex'] <- 1:ncol(endo_milo) # Add index also to single-cells -->

<!-- if (!is.null(split_by)){ -->
<!--   split_df <- data.frame(split_by=colData(endo_milo)[,split_by]) -->
<!--   split_df[,"nhIndex"] <- 1:nrow(split_df) -->
<!--   viz_df  <- left_join(viz_df, split_df, by="nhIndex") -->
<!-- } -->

<!-- filter_alpha=0.1 -->
<!-- ## Filter significant DA nhoods -->
<!-- if (!is.null(filter_alpha)) { -->
<!--   if (filter_alpha > 0) { -->
<!--     viz_df <- mutate(viz_df, logFC = ifelse(SpatialFDR > filter_alpha, NA, logFC)) -->
<!--   } -->
<!-- } -->

<!-- ## Plot -->
<!-- pt_size=1 -->
<!-- nhood_reduced_dims="UMAP" -->
<!-- components=c(1,2) -->
<!--   pl <- -->
<!--     ggplot(data = arrange(viz_df, abs(logFC)), -->
<!--            aes(X, Y)) + -->
<!--     geom_point(aes(color = ''), size = pt_size / 3, alpha = 0.5) + -->
<!--     geom_point( -->
<!--       data = . %>% filter(!is.na(SpatialFDR)), -->
<!--       aes(fill = logFC), -->
<!--       size = pt_size, -->
<!--       stroke = 0.1, -->
<!--       # colour="black", -->
<!--       shape = 21 -->
<!--     ) + -->
<!--     scale_fill_gradient2( -->
<!--       midpoint = 0, -->
<!--       high = "red", -->
<!--       low = "blue", -->
<!--       name = "log-FC" -->
<!--     ) + -->
<!--     xlab(paste(nhood_reduced_dims, components[1], sep="_")) + -->
<!--     ylab(paste(nhood_reduced_dims, components[2], sep="_")) -->

<!--   if (!is.null(split_by)) { -->
<!--     pl <- pl + facet_wrap(split_by~.) -->
<!--   } -->
<!--   if (!is.null(filter_alpha)) { -->
<!--     pl <- pl + -->
<!--       scale_color_manual(values = 'grey', label = paste("SpatialFDR >", round(filter_alpha, 2))) + -->
<!--       guides(colour = guide_legend( -->
<!--         '', -->
<!--         override.aes = list( -->
<!--           shape = 21, -->
<!--           colour = "black", -->
<!--           fill = "grey50", -->
<!--           size = pt_size, -->
<!--           alpha = 1, -->
<!--           stroke = 0.1 -->
<!--         ) -->
<!--       )) -->
<!--   } else { -->
<!--     pl <- pl + -->
<!--       scale_color_manual(values = 'grey') + -->
<!--       guides(color="none") -->
<!--   } -->

<!--   pl <- pl + -->
<!--     theme_classic(base_size = 16) + -->
<!--     theme( -->
<!--       axis.ticks = element_blank(), -->
<!--       axis.text = element_blank(), -->
<!--       plot.title = element_text(hjust = 0.5) -->
<!--     ) -->
<!-- pl -->
<!-- ``` -->

<!-- ```{r} -->
<!-- endo_milo <- scater::runPCA(endo_milo, subset_row=hvgs) -->
<!-- ``` -->



<!-- --- -->
<!-- ## Old (before I got dataset from authors) -->

<!-- Using data from [Ramachandran et al. 2019](https://www.nature.com/articles/s41586-019-1631-3#Sec1) (GEO accessiion: GSE136103).  -->

<!-- ```{r} -->
<!-- human_files <- list.files("~/Downloads/GSE136103_RAW", pattern="GSM40411.._healthy|cirrhotic", full.names = TRUE)  -->

<!-- prefixes <- str_remove(human_files, "barcodes.tsv.gz|genes.tsv.gz|matrix.mtx.gz") %>% -->
<!--   # str_remove(".+/") %>% -->
<!--   unique()  -->

<!-- sce_ls <- lapply(prefixes, function(x) read10xCounts(x, type="prefix")) -->
<!-- liver_sce <- purrr::reduce(sce_ls, cbind) -->

<!-- ## Make colData info -->
<!-- new_coldata <- colData(liver_sce) %>% -->
<!--   data.frame() %>% -->
<!--   mutate(Sample=str_remove(Sample, ".+/") %>% str_remove("_$")) %>% -->
<!--   separate(Sample, into=c("col1", "Patient", "Sort"), sep = "_", remove=FALSE) %>%  -->
<!--   mutate(Condition=str_remove(Patient, ".$")) %>% -->
<!--   select(-col1) %>% -->
<!--   mutate(Cell_id = str_c(Sample, "_",Barcode)) %>% -->
<!--   column_to_rownames("Cell_id") -->

<!-- colnames(liver_sce) <- rownames(new_coldata) -->
<!-- colData(liver_sce) <- DataFrame(new_coldata) -->

<!-- # saveRDS(liver_sce, "~/GSE136103_SingleCellExperiment.RDS") -->
<!-- ``` -->
<!-- ```{r} -->
<!-- liver_sce <- readRDS("~/GSE136103_SingleCellExperiment.RDS") -->
<!-- ``` -->

<!-- QC metrics show that the outlier cells are already filtered (following [this](https://osca.bioconductor.org/overview.html#data-processing-and-downstream-analysis)) -->

<!-- ```{r, fig.width=10,fig.height=8} -->
<!-- # is.mito <- grepl("^MT-", rownames(liver_sce)) -->
<!-- qcstats <- perCellQCMetrics(liver_sce) -->

<!-- colData(liver_sce) <- cbind(colData(liver_sce), qcstats) -->

<!-- plotColData(liver_sce, x="Sample", y = "total", other_fields = "Condition") + -->
<!--   scale_y_log10() + -->
<!--   facet_wrap(~Condition, scales = "free") + -->
<!--   ggtitle("total counts")  -->

<!-- plotColData(liver_sce, x="Sample", y = "detected", other_fields = "Condition") + -->
<!--   facet_wrap(~Condition, scales = "free") + -->
<!--   ggtitle("Detected genes")  -->

<!-- ``` -->

<!-- ### Normalization -->

<!-- ```{r} -->
<!-- lib_sf <- librarySizeFactors(liver_sce) -->
<!-- hist(log10(lib_sf), xlab="Log10[Size factor]", col='grey80') -->
<!-- ``` -->
<!-- ```{r} -->
<!-- set.seed(100) -->
<!-- liver_sce <- logNormCounts(liver_sce) -->
<!-- assayNames(liver_sce) -->
<!-- ``` -->

<!-- ### Feature selection -->

<!-- ```{r} -->
<!-- library(scran) -->
<!-- dec_liver <- modelGeneVar(liver_sce) -->

<!-- # Visualizing the fit: -->
<!-- fit_liver <- metadata(dec_liver) -->
<!-- plot(fit_liver$mean, fit_liver$var, xlab="Mean of log-expression", -->
<!--     ylab="Variance of log-expression") -->

<!-- hvgs <- getTopHVGs(dec_liver, n=3000) -->
<!-- ``` -->



<!-- ### Dim reduction -->

<!-- ```{r, fig.height=14, fig.width=14} -->
<!-- liver_sce <- scater::runPCA(liver_sce, subset_row=hvgs, ncomponents=30) -->
<!-- reducedDim(liver_sce, "PCA") <- reducedDim(liver_sce, "PCA")[,1:11] -->
<!-- plotPCA(liver_sce, colour_by="Condition", ncomponents=3) -->
<!-- ``` -->

<!-- ```{r, fig.height=8, fig.width=8} -->
<!-- liver_sce <- runUMAP(liver_sce, dimred="PCA", ncomponents=2) -->

<!-- scater::plotUMAP(liver_sce, colour_by="Condition", point_alpha=1,  point_size=0.8)  -->
<!-- scater::plotUMAP(liver_sce, colour_by="Sort", point_alpha=0.3,  point_size=0.5) -->
<!-- scater::plotUMAP(liver_sce, colour_by="Patient", point_alpha=0.3,  point_size=0.5) -->
<!-- ``` -->
<!-- ```{r, fig.height=10, fig.width=10} -->
<!-- rownames(liver_sce) <- rowData(liver_sce)$Symbol -->
<!-- scater::plotUMAP(liver_sce, colour_by="CD3D", point_alpha=0.3, point_size=0.5) -->
<!-- ``` -->



